Time Series

library(tidyverse) # for data wrangling
library(lubridate) # date manipulation
library(TSstudio) # time series interactive viz
library(tseries) # for adf.test
library(astsa)
library(imputeTS)
library(forecast)
library(magrittr)
#### Clean Files ####

#homogenisierte Daten
Engelberg_homogenisiert <- read.csv("Daten/Messreihe_Engelberg.csv")
#temp_homo_ts <- ts(summary_data$Avg_Temperature, start = c(1864, 1), frequency = 12)

# Engelberg einlesen
engelberg <- read.csv("Daten/Engelberg_data.csv",sep=';', na.strings=c('-',''))

engelberg$time <- as.Date(as.character(engelberg$time), format = "%Y%m%d")
engelberg_clean <- engelberg %>% select('time',
                                        'tre200nn') %>% 
  rename('temp' = 'tre200nn') %>%
  filter(year(time) > 1989)

# Where are they missing
#Get dates for which temperatures are missing
missingCases <- which(is.na(engelberg_clean$temp)==TRUE)
u <- engelberg_clean$time[missingCases]
engelberg_clean <- engelberg_clean %>%
  filter(year(time) > 1989) %>%
  na_replace(fill=0)

ts_engelberg <- ts(data = engelberg_clean$temp,
               start = c(1990,01,01),
               frequency = 365)

Engelberg Data only

Trend Visualizing

# > Tägliche Daten von 1990-2023
engelberg_clean_trend <- lm(engelberg_clean$temp~engelberg_clean$time)

plot(ts_engelberg)
abline(engelberg_clean_trend, col = 'red')

Excursion: Monthly Data

We take only monthly data, which has been selected via a randomized procedure, to see how the Meteo and homogenous Data look like together.

# >> Monatliche Daten von 1990-2023 mit Trendlinie von Homogen.
temp_yr <- engelberg_clean %>%
  mutate(temp_raw=replace_na(temp,0)) %>%
  group_by(Year=year(time)) %>%
  filter(Year>=1990 & Year<=2023) %>%
  summarize(temp_yr=mean(temp_raw)) %>%
  ungroup()
temp_mn <- engelberg_clean %>%
  mutate(temp_raw=replace_na(temp,0)) %>%
  group_by(Year=year(time), Month=month(time)) %>%
  filter(Year>=1990 & Year<=2023) %>%
  summarize(temp_mn=mean(temp_raw), .groups='keep') %>%
  ungroup()

temp_mn_ts <- ts(temp_mn$temp_mn, start=c(temp_mn$Year[1],temp_mn$Month[1]), frequency=12)
plot(temp_mn_ts)

# Visualisierung des Trendes
fresh_snow_trend <- lm(temp_mn$temp_mn~temp_mn$Year)
engelberg_homog_trend <- lm(Engelberg_homogenisiert$Temperature~Engelberg_homogenisiert$Year)
plot(temp_mn_ts, xlab='Year', ylab='Average Temp.')
abline(fresh_snow_trend, col = 'red')
abline(engelberg_homog_trend, col = 'blue')

We will be exploring how the time series behaves for the whole year data.

# Yearly Data decomposing
ts_engelberg_dc <- decompose(ts_engelberg)
plot(ts_engelberg_dc)

# Stationary?
adf.test(ts_engelberg)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_engelberg
## Dickey-Fuller = -7.2169, Lag order = 22, p-value = 0.01
## alternative hypothesis: stationary
## it is stationary
acf(ts_engelberg)

acf(ts_engelberg_dc$random,na.action=na.pass)

# Plot the PACF
pacf(ts_engelberg_dc$random, na.action = na.pass)

# Plotting
PAutoCorrelation <- pacf(ts_engelberg_dc$random, na.action = na.pass, plot=FALSE)
plot(PAutoCorrelation, main = "Whole Year PACF")

#Arima
# # Fitting to PACF 
window <- list(start=1990,end=2023)

temp_comp_random_y <- data.frame(Date=time(ts_engelberg_dc$random), Random=ts_engelberg_dc$random)
temp_comp_random_y %<>% filter(Date>=window$start&Date<window$end)
temp_comp_random_y_ts <- ts(temp_comp_random_y$Random)
arima(temp_comp_random_y_ts, c(1,0,0))
## 
## Call:
## arima(x = temp_comp_random_y_ts, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.6528    -0.0089
## s.e.  0.0070     0.0740
## 
## sigma^2 estimated as 7.745:  log likelihood = -28625.83,  aic = 57257.65

The result of this Arima-Model is not a good simulation. Therefore, to focus on our core question: How many days are the Ski resorts able to use the artificial snow, we will be using only the winter months.

Winter

This further investigation only takes into account how many days have there been in a month where the temperature dropped below 0°.

engelberg_filtered <- engelberg_clean %>%
  # Assuming 'time' is already a Date object. If not, convert it first with as.Date()
  filter(year(time) > 1989) %>%
  filter(month(time) %in% c(12, 1, 2, 3)) 
# below 0 per month
monthly_below_zero <- engelberg_filtered %>%
  filter(temp < 0) %>%
  mutate(year = year(time),
         month = month(time)) %>%
  group_by(year, month) %>%
  summarise(days_below_zero = n(), .groups = "drop") %>% # Drop the grouping
  mutate(year_month = paste(year, month, sep="-")) %>% # Create year-month column
  select(year_month, days_below_zero) # Select only the columns you want

monthly_below_zero <- monthly_below_zero %>%
  mutate(year_month = as.Date(paste(year_month, "01", sep="-")))
ts_month <- ts(data = monthly_below_zero$days_below_zero,
                   start = c(1990,01),
                   frequency = 4)
autoplot(ts_month)

ts_month_dc <- decompose(ts_month)
plot(ts_month_dc)

adf.test(ts_month)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_month
## Dickey-Fuller = -4.4889, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

The decomposed time series of monthly days that measured below 0° is stationary.

acf(ts_month)

acf(ts_month_dc$random,na.action=na.pass)

pacf(ts_month_dc$random,na.action=na.pass)

# Arima Model 
month_auto <- auto.arima(ts_month, D=1, d=1)
month_auto
## Series: ts_month 
## ARIMA(3,1,0)(2,1,0)[4] 
## 
## Coefficients:
##           ar1      ar2      ar3     sar1     sar2
##       -0.6536  -0.4609  -0.1771  -0.8653  -0.4880
## s.e.   0.0884   0.1018   0.1044   0.0903   0.0797
## 
## sigma^2 = 33.01:  log likelihood = -404.83
## AIC=821.67   AICc=822.36   BIC=838.78
month_auto.1 <- auto.arima(ts_month)
month_auto.1
## Series: ts_month 
## ARIMA(0,0,2)(2,0,0)[4] with non-zero mean 
## 
## Coefficients:
##          ma1      ma2    sar1    sar2    mean
##       0.0949  -0.2114  0.2002  0.1514  22.120
## s.e.  0.0883   0.0943  0.0882  0.0980   0.591
## 
## sigma^2 = 26.93:  log likelihood = -405.4
## AIC=822.81   AICc=823.47   BIC=840.15

Forecast of our Arima Model

# Forecasting
f <- forecast(month_auto, level=c(95), h=5*4)


# Plotting the forecast
plot(f, main = "Forecast for the Next 5 Winter years", 
     xlab = "Time", ylab = "Forecast")

# Assuming 'f' is a forecast object that has a 'mean' component
y_lower <- min(-10, min(f$mean))
y_upper <- 60

plot(f, main = "Forecast for the Next 5 Winter years", 
     xlab = "Time", ylab = "Forecast",
     ylim = c(y_lower, y_upper))

Now we will be analyzing daily data of the winter months

ts_daily <- ts(data = engelberg_filtered$temp,
               start = c(1990,01),
               frequency = 365)
autoplot(ts_daily)

# Decompose
ts_daily_dc <- decompose(ts_daily)
plot(ts_daily_dc)

adf.test(ts_daily)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_daily
## Dickey-Fuller = -12.447, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
acf(ts_daily)

acf(ts_daily_dc$random,na.action=na.pass)

PAutoCorrelation_m <- pacf(ts_daily_dc$random,na.action=na.pass, plot=FALSE)
plot(PAutoCorrelation_m, main = "Winter month PACF")

# Arima Model
# # Fitting to PACF 
window <- list(start=1990,end=2023)

temp_comp_random <- data.frame(Date=time(ts_daily_dc$random), Random=ts_daily_dc$random)
temp_comp_random %<>% filter(Date>=window$start&Date<window$end)
temp_comp_random_ts <- ts(temp_comp_random$Random)

arima(temp_comp_random_ts, c(1,0,0))
## 
## Call:
## arima(x = temp_comp_random_ts, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.6737     0.0204
## s.e.  0.0122     0.1619
## 
## sigma^2 estimated as 10.25:  log likelihood = -9473.05,  aic = 18952.09

Interpretation: The ARIMA model output shown indicates that the best-fitting model for the daily temperature data for a winter location during the winter months is an ARIMA(1,0,0), also known as an AR(1) model. The coefficient for `ar1` is 0.6737, suggesting a positive autocorrelation where a higher temperature on one day is likely to be followed by a higher temperature the next day. The intercept of 0.0204 suggests a very small upward trend in the temperature data, although its standard error is quite large (0.1619) relative to the coefficient value, indicating that this trend is not statistically significant.

One Year Forecast

In the following chapter we will be exploring the winter months of 2000 to maybe see how the forecast will be. 2000 has been chosen with a heuristic process because it is not the warmest winter nor the coldest.

one_engelberg <- engelberg_filtered %>% 
  filter(year(time) == 2000)
         # > 1999 &
         #   year(time) < 2003)

two_engelberg <- engelberg_filtered %>% 
  filter(year(time) == 2001)
ts_one <- ts(data = one_engelberg$temp, frequency = 30)
autoplot(ts_one)

ts_two <- ts(data = two_engelberg$temp, frequency = 30)
ts_one_dc <- decompose(ts_one)
plot(ts_one_dc)

adf.test(ts_one)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_one
## Dickey-Fuller = -2.9608, Lag order = 4, p-value = 0.1775
## alternative hypothesis: stationary
ts_one_diff <- na.omit(diff(ts_one))
adf.test(ts_one_diff)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_one_diff
## Dickey-Fuller = -6.0305, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
ts_one_diff_dc <- decompose(ts_one_diff)
plot(ts_one_diff_dc)

acf(ts_one_diff)

acf(ts_one_diff_dc$random,na.action=na.pass)

PAutoCorrelation_m <- pacf(ts_one_diff_dc$random,
                           na.action=na.pass, plot=FALSE)
plot(PAutoCorrelation_m, main = "1 winter PACF 2000")

window <- list(start=2000,end=2002)

temp_comp_random_one <- data.frame(Date=time(ts_one_dc$random), Random=ts_one_dc$random)
#temp_comp_random_one %<>% filter(Date>=window$start&Date<window$end)
temp_comp_random_one_ts <- ts(temp_comp_random_one$Random)

winter_model_dc <- arima(temp_comp_random_ts, c(1,0,0))
summary(winter_model_dc)
## 
## Call:
## arima(x = temp_comp_random_ts, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.6737     0.0204
## s.e.  0.0122     0.1619
## 
## sigma^2 estimated as 10.25:  log likelihood = -9473.05,  aic = 18952.09
## 
## Training set error measures:
##                         ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.0008668858 3.201432 2.455616 -37.91163 360.3382 0.9182744
##                     ACF1
## Training set -0.02672547

This model is using the decomposed data. The ARIMA(1,0,0) model summary for daily temperatures during winter months shows a significant positive autocorrelation in temperature data, as indicated by the ar1 coefficient of 0.6737. Error metrics such as the Root Mean Squared Error (RMSE) of 3.201432 and Mean Absolute Error (MAE) of 2.455616 suggest the model’s predictions are reasonably close to the actual temperatures, although the Mean Percentage Error (MPE) of -37.91163 indicates a systematic underestimation by the model.

winter_model <- arima(ts_one, c(2,0,0))
winter_model
## 
## Call:
## arima(x = ts_one, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.6454  0.0351    -2.7220
## s.e.  0.0906  0.0910     0.9538
## 
## sigma^2 estimated as 11.7:  log likelihood = -323.46,  aic = 654.91

The ARIMA(2,0,0) model for the daily temperature of a winter location indicates a primary autoregressive component at lag 1 (ar1 coefficient of 0.6454) and a much smaller effect at lag 2 (ar2 coefficient of 0.0351), suggesting that past temperatures have a significant influence on future temperatures, with the most recent day having the strongest effect. The negative intercept of -2.7220 may imply a baseline adjustment from the mean temperature, but its practical significance should be cautiously interpreted due to its relatively large standard error of 0.9538 compared to the coefficient value.

f <- forecast(winter_model, level=c(95), h=length(ts_two))
plot(f)

# Now add the actual data points.
#lines(ts_two, col="red")
forecast_start <- length(f$model$x)
# lines(seq(forecast_start, forecast_start + length(ts_two) - 1), 
# ts_two, col="red")

The plot displays the forecast from an ARIMA(2,0,0) model with a non-zero mean for future values of a time series, likely representing daily temperatures. The forecast shows a leveling off of the temperature values, with the shaded area representing a 95% confidence interval indicating where future observations are likely to fall, with the uncertainty increasing as the forecast extends further into the future.

Seebodenalp (Rigi)

kuessnacht <- read.csv("Daten/Seebodenalp_data.csv",sep=';', na.strings=c('-',''))

kuessnacht$time <- as.Date(as.character(kuessnacht$time), format = "%Y%m%d")
kuessnacht_clean <- kuessnacht %>% select('time',
                                        'tre200nn') %>% 
  rename('temp' = 'tre200nn') %>%
  filter(year(time) > 1989)
kuessnacht_clean <- na.omit(kuessnacht_clean[!is.na(kuessnacht_clean$temp), ])

Winter

kuessnacht_filtered <- kuessnacht_clean %>%
  # Assuming 'time' is already a Date object. If not, convert it first with as.Date()
  filter(year(time) > 1989) %>%
  filter(month(time) %in% c(12, 1, 2, 3)) 

Now we will be focusing on monthly days below 0°

monthly_below_zero <- kuessnacht_filtered %>%
  filter(temp < 0) %>%
  mutate(year = year(time),
         month = month(time)) %>%
  group_by(year, month) %>%
  summarise(days_below_zero = n(), .groups = "drop") %>% # Drop the grouping
  mutate(year_month = paste(year, month, sep="-")) %>% # Create year-month column
  select(year_month, days_below_zero) # Select only the columns you want

monthly_below_zero_k <- monthly_below_zero %>%
  mutate(year_month = as.Date(paste(year_month, "01", sep="-")))
ts_month_k <- ts(data = monthly_below_zero_k$days_below_zero,
               start = c(1990,01),
               frequency = 4)
autoplot(ts_month_k)

# Decompose
ts_month_k_dc <- decompose(ts_month_k)
plot(ts_month_k_dc)

adf.test(ts_month_k)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_month_k
## Dickey-Fuller = -4.0566, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
acf(ts_month_k)

acf(ts_month_k_dc$random,na.action=na.pass)

pacf(ts_month_k_dc$random,na.action=na.pass)

month_auto_k <- auto.arima(ts_month_k, D=1, d=1)
month_auto_k
## Series: ts_month_k 
## ARIMA(0,1,1)(0,1,2)[4] 
## 
## Coefficients:
##           ma1     sma1    sma2
##       -0.8398  -1.2282  0.2582
## s.e.   0.0624   0.1099  0.0862
## 
## sigma^2 = 29.82:  log likelihood = -392.57
## AIC=793.14   AICc=793.47   BIC=804.42

The ARIMA model summary indicates that the best-fitting model for the time series `ts_month_k` is an ARIMA(0,1,1)(0,1,2)[4], which suggests a seasonal model with a non-seasonal differencing of 1 and a seasonal differencing of 1 at a seasonal period of 4. The coefficients for the moving average part (ma1) and the first two seasonal moving average parts (sma1 and sma2) are significant, with ma1 being negative, indicating a smoothing effect from the previous value, and sma1 being strongly negative, which may suggest a corrective effect from the past seasonal cycle. The positive sma2 indicates a smaller smoothing effect from two seasons ago. The relatively low AIC, AICC, and BIC values suggest a model that balances goodness of fit with complexity, although model validation against unseen data is needed to confirm its predictive power.

month_auto_k.1 <- auto.arima(ts_month_k)
month_auto_k.1
## Series: ts_month_k 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##           ar1     ma1     mean
##       -0.5957  0.7870  17.0877
## s.e.   0.1479  0.1077   0.5711
## 
## sigma^2 = 34.39:  log likelihood = -409.78
## AIC=827.55   AICc=827.87   BIC=838.99
f <- forecast(month_auto_k, level=c(95), h=5*4)
# Assuming 'f' is a forecast object that has a 'mean' component
y_lower <- min(-10, min(f$mean))
y_upper <- 60

plot(f, main = "Forecast for the Next 5 Winter", 
     xlab = "Time", ylab = "Forecast",
     ylim = c(y_lower, y_upper))

We will not be doing any other Time Series Analysis as we did for Engelberg, because for our analysis it is only of importance the below monthly 0° days for the winter months.

Tourism Data

EB <- read.csv("Daten/Room_Occupancy_Egelberg.csv")
KS <- read.csv("Daten/Room_Occupancy_Kuessnacht SZ.csv")
# Create time series
freq_monthly <- 12
Occupancy_Engelberg<-
  ts(EB$Room.occupancy, start=c(year(min(EB$Date)),yday(min(EB$Date))), frequency=freq_monthly) %>%
  na_replace(fill=0)
Occupancy_Kuessnacht<-
  ts(KS$Room.occupancy, start=c(year(min(KS$Date)),yday(min(KS$Date))), frequency=freq_monthly) %>%
  na_replace(fill=0)
plot(Occupancy_Engelberg)

plot(Occupancy_Kuessnacht)

# Sationarity test and decomposition
adf.test(Occupancy_Engelberg,k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Occupancy_Engelberg
## Dickey-Fuller = -5.5441, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
adf.test(Occupancy_Engelberg,k=20)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Occupancy_Engelberg
## Dickey-Fuller = -2.8082, Lag order = 20, p-value = 0.2408
## alternative hypothesis: stationary
Occupency_Engelberg_comp=decompose(Occupancy_Engelberg)
plot(Occupency_Engelberg_comp)

Occupency_Kuessnacht_comp=decompose(Occupancy_Kuessnacht)
plot(Occupency_Kuessnacht_comp)

adf.test(Occupancy_Engelberg,k=3)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Occupancy_Engelberg
## Dickey-Fuller = -4.552, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
acf(Occupency_Engelberg_comp$random,na.action=na.pass)

pacf(Occupency_Engelberg_comp$random,na.action=na.pass)

pacf(Occupancy_Engelberg)

adf.test(Occupancy_Kuessnacht,k=3)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Occupancy_Kuessnacht
## Dickey-Fuller = -7.1425, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
acf(Occupency_Kuessnacht_comp$random,na.action=na.pass)

pacf(Occupency_Kuessnacht_comp$random,na.action=na.pass)

pacf(Occupancy_Kuessnacht)

plot(Occupency_Engelberg_comp$random)

plot(Occupency_Kuessnacht_comp$random)

Cross Correlation Engelberg

ccf(na.omit(Occupency_Engelberg_comp$random), 
    na.omit(Occupency_Kuessnacht_comp$random))

Remove all non-winter months

# Delete all months except winter
EB$Date <- as.Date(EB$Date)
df_filtered <- EB[format(EB$Date, "%m") %in% c("12", "01", "02", "03"), ]
KS$Date <- as.Date(KS$Date)
df2_filtered <- KS[format(KS$Date, "%m") %in% c("12", "01", "02", "03"), ]
#########################################################################################3
# Time series temperature Engelberg
df_temp_EN <- read.csv("Daten/Engelberg_monthly_below_zero.csv")

df_temp_EN <- slice(df_temp_EN, -(1:92))

df_filtered <- slice(df_filtered, -42)
freq_monthly <- 4
Occupancy_Engelberg_season<-
  ts(as.numeric(df_filtered$Room.occupancy), frequency=freq_monthly) %>%
  na_replace(fill=0)
temp_Engelberg <- 
  ts(as.numeric(df_temp_EN$days_below_zero), frequency=freq_monthly) %>%
  na_replace(fill=0)
plot(Occupancy_Engelberg_season)

plot(temp_Engelberg)

Occupancy_Engelberg_season_comp <- decompose(Occupancy_Engelberg_season)
temp_Engelberg_comp <- decompose(temp_Engelberg)
plot(Occupancy_Engelberg_season_comp)

plot(temp_Engelberg_comp)

ccf(temp_Engelberg_comp$random, Occupancy_Engelberg_season_comp$random, na.action = na.pass)

Cross Correlation Küssnacht

df_temp_KS <- read.csv("Daten/kuessnacht_monthly_below_zero.csv")

df_temp_KS <- head(df_temp_KS, -2)

df_temp_KS <- tail(df_temp_KS, 42)

freq_monthly <- 4
Occupancy_Kuessnacht_season<-
  ts(as.numeric(df2_filtered$Room.occupancy), frequency=freq_monthly) %>%
  na_replace(fill=0)
temp_Kuessnacht <- 
  ts(as.numeric(df_temp_KS$days_below_zero), frequency=freq_monthly) %>%
  na_replace(fill=0)
plot(Occupancy_Kuessnacht_season)

plot(temp_Kuessnacht)

Occupancy_Kuessnacht_season_comp <- decompose(Occupancy_Kuessnacht_season)
temp_Kuessnacht_comp <- decompose(temp_Kuessnacht)
plot(Occupancy_Kuessnacht_season_comp)

plot(temp_Kuessnacht_comp)

ccf(temp_Kuessnacht_comp$random, Occupancy_Kuessnacht_season_comp$random, na.action = na.pass)